package org.test.hadoop.mapreduce;

import java.io.IOException;
import java.util.List;

import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class SplitWordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {

	@Override
	protected void map(LongWritable key, Text value,
			Mapper<LongWritable, Text, Text, IntWritable>.Context context)
			throws IOException, InterruptedException {
		// 分词
		if(value!=null&&value.toString().trim().length()>0){
			List<Term> list = NlpAnalysis.parse(value.toString());
			for(Term t : list){
				context.write(new Text(t.getName()), new IntWritable(1));
			}
		}
	}

}
